packages <- c("CIMseq", "CIMseq.testing", "tidyverse", "circlize", "printr")
purrr::walk(packages, library, character.only = TRUE)
rm(packages)

##DATA
load('../data/CIMseqData.rda')
load('../data/sObj.rda')
load('../data/seuratDE.rda')
if(!dir.exists('../figures')) dir.create('../figures')

#there are 5 cells that were classified as colon but sorted as SI. These have to
#be removed manually
c <- getData(cObjSng, "classification")
s <- names(c[c %in% c("4", "9")])
i <- which(colnames(getData(cObjSng, "counts")) %in% s)
cObjSng <- CIMseqSinglets(
  getData(cObjSng, "counts")[, -i],
  getData(cObjSng, "counts.ercc")[, -i],
  getData(cObjSng, "dim.red")[-i, ],
  getData(cObjSng, "classification")[-i]
)

Fig 1: Classes

p <- plotUnsupervisedClass(cObjSng, cObjMul, palette('c'))
p

ggsave(
  plot = p,
  filename = '../figures/MGA.enge20.classes.pdf',
  device = cairo_pdf,
  height = 180,
  width = 180,
  units = "mm"
)

Fig 2: Cell type gene expression

p <- plotUnsupervisedMarkers(
  cObjSng, cObjMul,
  c("Lgr5", "Ptprc", "Chga", "Dclk1", "Slc26a3", "Atoh1"),
  pal = RColorBrewer::brewer.pal(8, "Set1")
)
p

ggsave(
  plot = p,
  filename = '../figures/MGA.enge20.markers.pdf',
  device = cairo_pdf,
  height = 180,
  width = 180,
  units = "mm"
)

Fig 3: Cell cycle and Plet1

p <- plotUnsupervisedMarkers(
  cObjSng, cObjMul, c("Mki67"),
  pal = RColorBrewer::brewer.pal(8, "Set1")
)
p

ggsave(
  plot = p,
  filename = '../figures/MGA.enge20.Mki67.pdf',
  device = cairo_pdf,
  height = 180,
  width = 180,
  units = "mm"
)
#Shown in differential expression analysis instead
# p <- plotUnsupervisedMarkers(
#   cObjSng, cObjMul, c("Plet1"),
#   pal = RColorBrewer::brewer.pal(8, "Set1")
# )
# p
# ggsave(
#   plot = p,
#   filename = '../figures/MGA.enge20.Plet1.pdf',
#   device = cairo_pdf,
#   height = 180,
#   width = 180,
#   units = "mm"
# )

Fig 4: Connections per multiplet

adj <- adjustFractions(cObjSng, cObjMul, sObj)
as.data.frame(table(apply(adj, 1, sum)))
Var1 Freq
0 142
1 571
2 588
3 268
4 94
5 32
6 5
7 3

Fig 5: Fraction histogram

tibble(fractions = c(fractions)) %>%
  ggplot() +
  geom_histogram(aes(fractions), binwidth = 0.01) +
  theme_bw()

Fig 6: Detected cell types vs. cost

tibble(
  nCellTypes = apply(adj, 1, sum),
  cost = getData(sObj, "costs")
) %>%
  ggplot() +
  geom_boxplot(aes(nCellTypes, cost, group = nCellTypes)) +
  scale_x_continuous(name = "Detected cell types", breaks = 0:max(apply(adj, 1, sum))) +
  theme_bw()

Fig 7: Estimated cell numbers vs. cost

tibble(
  sample = names(getData(sObj, "costs")),
  cost = unname(getData(sObj, "costs"))
) %>%
  inner_join(
    select(estimateCells(cObjSng, cObjMul), sample, estimatedCellNumber), 
    by = "sample"
  ) %>%
  mutate(estimatedCellNumber = round(estimatedCellNumber)) %>%
  ggplot() +
  geom_boxplot(aes(estimatedCellNumber, cost, group = estimatedCellNumber)) +
  scale_x_continuous(
    name = "ERCC estimated cell number", 
    breaks = 0:max(round(pull(estimateCells(cObjSng, cObjMul), estimatedCellNumber)))
  ) +
  theme_bw()

Fig 8: Estimated cell number vs. Detected cell number

ercc <- filter(estimateCells(cObjSng, cObjMul), sampleType == "Multiplet")
nConnections <- apply(adj, 1, sum)
nConnections <- nConnections[match(ercc$sample, names(nConnections))]
tibble(
  detectedConnections = round(nConnections),
  estimatedCellNumber = round(ercc$estimatedCellNumber)
) %>%
  ggplot() +
  geom_boxplot(aes(estimatedCellNumber, detectedConnections, group = estimatedCellNumber)) +
  scale_x_continuous(
    name = "ERCC estimated cell number", 
    breaks = 0:max(round(ercc$estimatedCellNumber))
  ) +
  scale_y_continuous(
    name = "Detected cell number",
    breaks = 0:max(round(nConnections))
  ) +
  theme_bw()

Fig 9: Detected cell number vs. Total counts

tibble(
  sample = names(nConnections),
  detectedConnections = nConnections
) %>%
  inner_join(tibble(
    sample = colnames(getData(cObjMul, "counts")),
    total.counts = colSums(getData(cObjMul, "counts"))
  ), by = "sample") %>%
  ggplot() +
  geom_boxplot(aes(detectedConnections, total.counts, group = detectedConnections)) +
  scale_x_continuous(
    name = "Detected cell number", 
    breaks = 0:max(nConnections)
  ) +
  scale_y_continuous(name = "Total counts") +
  theme_bw()

Fig 10: Detected cell number vs. Total ERCC counts

tibble(
  sample = names(nConnections),
  detectedConnections = nConnections
) %>%
  inner_join(tibble(
    sample = colnames(getData(cObjMul, "counts")),
    total.ercc = colSums(getData(cObjMul, "counts.ercc"))
  ), by = "sample") %>%
  ggplot() +
  geom_boxplot(aes(detectedConnections, total.ercc, group = detectedConnections)) +
  scale_x_continuous(
    name = "Detected cell number", 
    breaks = 0:max(nConnections)
  ) +
  scale_y_continuous(name = "Total ERCC counts") +
  theme_bw()

Fig 11: Relative frequency of singlets vs. deconvoluted multiplets

singlets <- c(table(getData(cObjSng, "classification")))
singlets <- singlets / sum(singlets)
deconv <- colSums(adjustFractions(cObjSng, cObjMul, sObj))
deconv <- deconv[match(names(singlets), names(deconv))]
deconv <- deconv / sum(deconv)
if(!identical(names(singlets), names(deconv))) stop("name mismatch")

p <- tibble(
  class = names(singlets),
  singlet.freq = singlets,
  multiplet.freq = deconv
) %>%
  ggplot() +
  geom_point(aes(singlet.freq, multiplet.freq, colour = class), size = 3) +
  scale_colour_manual(values = palette('c')[order(names(palette('c')))]) +
  xlim(min(c(deconv, singlets)), max(c(deconv, singlets))) +
  ylim(min(c(deconv, singlets)), max(c(deconv, singlets))) +
  geom_abline(slope = 1, intercept = 0, lty = 3, colour = "grey") +
  labs(x = "Singlet relative frequency", y = "Multiplet relative frequency") +
  guides(colour = guide_legend(title = "Cell Type")) +
  theme_bw()

p

ggsave(
  plot = p,
  filename = '../figures/MGA.enge20.sngMulRelFreq.pdf',
  device = cairo_pdf,
  height = 180,
  width = 180,
  units = "mm"
)

Fig 12: All connections

plotSwarmCircos(
  sObj, cObjSng, cObjMul, classOrder = classOrder.MGA('c'), 
  classColour = palette('c')[classOrder.MGA('c')], h.ratio = 0.85
)
## Joining, by = "class"

Fig 13: Filtered

Only detected duplicates, triplicates, and quadruplicates.
ERCC estimated cell number set to max 4.
Weight cutoff = 10. Alpha = 1e-3.

# adj <- adjustFractions(cObjSng, cObjMul, sObj, binary = TRUE)
# samples <- rownames(adj)
# rs <- rowSums(adj)
# keep <- rs == 2 | rs == 3 | rs == 4

plotSwarmCircos(
  sObj, cObjSng, cObjMul, weightCut = 10, 
  classOrder = classOrder.MGA('c'), theoretical.max = 4, classColour = palette('c')[classOrder.MGA('c')], 
  h.ratio = 0.85, alpha = 1e-3
)
## Joining, by = "class"

pdf('../figures/MGA.enge20.circos.pdf', width = 9.5, height = 9.5, onefile=FALSE)
plotSwarmCircos(
  sObj, cObjSng, cObjMul, weightCut = 10, 
  classOrder = classOrder.MGA('c'), theoretical.max = 4, classColour = palette('c')[classOrder.MGA('c')], 
  h.ratio = 0.85, alpha = 1e-3
)
## Joining, by = "class"
dev.off()
## quartz_off_screen 
##                 2

Calculate the probability of observing Lgr5 expression when Plet1 is or is not expressed in Muc2 high expressing multiplets.

p <- getData(cObjMul, "counts.cpm") %>% 
  .[c("Plet1", "Lgr5", "Muc2"), ] %>%
  t() %>%
  matrix_to_tibble("sample") %>%
  filter(Muc2 > 3000) %>% #include only Muc2 high
  mutate(
    express.plet1 = if_else(Plet1 > 0, TRUE, FALSE),
    express.lgr5 = if_else(Lgr5 > 0, TRUE, FALSE)
  ) %>%
  group_by(express.plet1, express.lgr5) %>%
  summarize(n = n()) %>%
  ungroup() %>%
  group_by(express.plet1) %>%
  mutate(total = sum(n)) %>%
  ungroup() %>%
  mutate(lgr5.prob = n / total) %>%
  filter(express.lgr5) %>%
  ggplot() + 
  geom_bar(aes(express.plet1, lgr5.prob), stat = "identity", position = position_dodge(width = 1)) +
  labs(x = "Plet1 expressed", y = "Lgr5 probability") +
  ggthemes::theme_few()

p

ggsave(
  plot = p,
  filename = '../figures/MGA.enge20.Lgr5prob.pdf',
  device = cairo_pdf,
  height = 180,
  width = 90,
  units = "mm"
)
pdata <- adjustFractions(cObjSng, cObjMul, sObj, theoretical.max = 4) %>%
  matrix_to_tibble("sample") %>%
  filter(`Goblet Plet1 1` == 1) %>%
  select(-`Goblet Plet1 1`) %>%
  gather(class, binary, -sample) %>%
  group_by(sample) %>%
  summarize(others = paste(class[binary == 1], collapse = ", ")) %>%
  mutate(others = map(others, ~str_split(.x, ", ")[[1]])) %>%
  unnest() %>%
  filter(others != "") %>%
  group_by(others) %>%
  summarize(prob = n() / nrow(.)) %>% 
  rename(class = others) %>%
  full_join(tibble(class = unique(getData(cObjSng, "classification")))) %>%
  filter(class != "Goblet Plet1 1") %>%
  replace_na(list(prob = 0))

p <- pdata %>% 
  ggplot() +
  geom_bar(aes(class, prob), stat = "identity", position = position_dodge(width = 1)) +
  geom_text(aes(class, prob + 0.01, label = round(prob, digits = 3))) +
  theme_bw() +
  labs(y = "Probability") +
  theme(axis.title.x = element_blank(), axis.text.x = element_text(angle = 90))

p
ggsave(
  plot = p,
  filename = '../figures/MGA.enge20.PletIntProb.pdf',
  device = cairo_pdf,
  height = 240,
  width = 240,
  units = "mm"
)

Plot mean Lgr5 expression in Stem -> colonocyte differentiation trajectory.

classes <- c("Stem 1", "Stem 2", "Stem 3", "Transit amplifying", "Progenitor", "Colonocytes")
markers <- c("Lgr5", "Mki67", "Slc26a3")
gene.order <- markers
scale.func <- scale_radius
scale.min <- NA
scale.max <- NA
  
  
getData(cObjSng, "counts.cpm")[markers, ] %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column("Sample") %>%
  as_tibble() %>% 
  gather(gene, cpm, -Sample) %>%
  inner_join(tibble(
      Sample = colnames(getData(cObjSng, "counts")), 
      Classification = getData(cObjSng, "classification")
  )) %>%
  filter(Classification %in% classes) %>%
  group_by(gene, Classification) %>%
  summarize(mean = mean(cpm), pct = 100 * (length(which(cpm != 0)) / n())) %>%
  mutate(scaled.mean.exp = scale(mean)) %>%
  ungroup() %>%
  mutate(Classification = parse_factor(Classification, levels = classes)) %>%
  mutate(gene = parse_factor(gene, levels = rev(markers))) %>%
  ggplot() +
  geom_point(
      aes(Classification, gene, size = pct, colour = scaled.mean.exp)
  ) +
  scale_colour_gradient(low = "white", high = "darkred") +
  scale.func(range = c(0, 18), limits = c(scale.min, scale.max)) +
  guides(
    size = guide_legend(
      title = "% expressed", title.position = "top", title.hjust = 0.5
    ),
    colour = guide_colorbar(
      title = "Scaled mean expression", title.position = "top", 
      title.hjust = 0.5, barwidth = 10
    )
  ) +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1), 
    axis.title = element_blank(), 
    legend.position = "top",
    legend.justification = "center"
  )
## Joining, by = "Sample"

Differential expression Plet1 positive vs. Plet1 negative. Show top 25 DE genes.

data <- seuratDE %>% 
  rownames_to_column("gene") %>% 
  slice(1:25) %>%
  as.data.frame()

data
gene p_val avg_logFC pct.1 pct.2 p_val_adj
Plet1os 0 3.7486628 0.722 0.075 0
Plet1 0 3.9084046 0.770 0.157 0
Ddah1 0 2.1013108 0.827 0.177 0
Car8 0 3.7732619 0.693 0.070 0
B4galnt1 0 2.8126439 0.737 0.162 0
B4galnt2 0 1.4412589 0.916 0.548 0
Cgref1 0 0.9192514 0.955 0.855 0
Fabp2 0 1.5979479 0.830 0.357 0
Sec11c 0 0.5651860 1.000 0.977 0
Ccl9 0 2.3999766 0.624 0.125 0
Kcnh3 0 2.7078891 0.516 0.043 0
Muc2 0 0.7748334 0.994 0.971 0
Isx 0 4.2434932 0.442 0.014 0
Osr2 0 4.9788936 0.424 0.020 0
Lgals9 0 1.3359941 0.809 0.368 0
Aqp1 0 2.7265643 0.534 0.099 0
Mt3 0 1.8163239 0.621 0.159 0
2210407C18Rik 0 4.2631272 0.394 0.006 0
Ang 0 1.1171340 0.955 0.730 0
Pcsk1 0 2.9994274 0.445 0.038 0
Retnlb 0 2.7394048 0.594 0.186 0
Rnase4 0 0.7872851 0.982 0.925 0
Chn2 0 3.2365524 0.397 0.017 0
Guk1 0 0.6230898 0.982 0.890 0
Cd44 0 2.9540720 0.469 0.075 0
#plot selected
markers <- c("Plet1", "Ccl9", "Kcnh3", "Mt3", "Pcsk1", "Cgref1", "Cd44", "Lgals9", "Ang")
p <- plotUnsupervisedMarkers(cObjSng, cObjMul, markers) %>%
  plotData() %>%
  gather(gene, value, -Sample, -(`Sample type`:Colour)) %>%
  mutate(gene = parse_factor(gene, levels = markers)) %>%
  ggplot() +
  geom_point(aes(`dim.red dim 1`, `dim.red dim 2`, colour = value), size = 0.5) +
  scale_colour_viridis_c(option = "E") +
  facet_wrap(~gene, scales = "free") +
  ggthemes::theme_few() +
  theme(legend.position = "top") +
  guides(colour = guide_colourbar(
    title = "log2(CPM + 1) normalized to [0, 1]" ,
    title.position = "top", 
    title.hjust = 0.5,
    barwidth = 15
  ))

p

ggsave(
  plot = p,
  filename = '../figures/MGA.enge20.DE.pdf',
  device = cairo_pdf,
  height = 240,
  width = 240,
  units = "mm"
)
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] printr_0.1           circlize_0.4.8       forcats_0.4.0       
##  [4] stringr_1.4.0        dplyr_0.8.3          purrr_0.3.2         
##  [7] readr_1.3.1          tidyr_0.8.3          tibble_2.1.3        
## [10] ggplot2_3.2.1        tidyverse_1.2.1      CIMseq.testing_0.0.2
## [13] CIMseq_0.3.0.2      
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-141        matrixStats_0.55.0  lubridate_1.7.4    
##  [4] RColorBrewer_1.1-2  gmodels_2.18.1      httr_1.4.1         
##  [7] tools_3.6.1         backports_1.1.4     R6_2.4.0           
## [10] lazyeval_0.2.2      BiocGenerics_0.30.0 colorspace_1.4-1   
## [13] withr_2.1.2         tidyselect_0.2.5    gridExtra_2.3      
## [16] compiler_3.6.1      cli_1.1.0           rvest_0.3.4        
## [19] xml2_1.2.2          labeling_0.3        scales_1.0.0       
## [22] digest_0.6.20       rmarkdown_1.15      pkgconfig_2.0.2    
## [25] htmltools_0.3.6     highr_0.8           rlang_0.4.0        
## [28] GlobalOptions_0.1.0 ggthemes_4.2.0      readxl_1.3.1       
## [31] rstudioapi_0.10     shape_1.4.4         farver_1.1.0       
## [34] generics_0.0.2      jsonlite_1.6        gtools_3.8.1       
## [37] magrittr_1.5        Rcpp_1.0.2          munsell_0.5.0      
## [40] S4Vectors_0.22.1    viridis_0.5.1       stringi_1.4.3      
## [43] yaml_2.2.0          ggraph_2.0.0        MASS_7.3-51.4      
## [46] Rtsne_0.15          grid_3.6.1          parallel_3.6.1     
## [49] gdata_2.18.0        listenv_0.7.0       ggrepel_0.8.1      
## [52] crayon_1.3.4        lattice_0.20-38     graphlayouts_0.5.0 
## [55] haven_2.1.1         hms_0.5.1           zeallot_0.1.0      
## [58] knitr_1.24          pillar_1.4.2        igraph_1.2.4.1     
## [61] pso_1.0.3           future.apply_1.3.0  codetools_0.2-16   
## [64] stats4_3.6.1        glue_1.3.1          evaluate_0.14      
## [67] modelr_0.1.5        vctrs_0.2.0         tweenr_1.0.1       
## [70] cellranger_1.1.0    gtable_0.3.0        RANN_2.6.1         
## [73] polyclip_1.10-0     future_1.14.0       assertthat_0.2.1   
## [76] xfun_0.9            gridBase_0.4-7      ggforce_0.3.1      
## [79] broom_0.5.2         tidygraph_1.1.2     viridisLite_0.3.0  
## [82] globals_0.12.4